import pandas as pd

# Import data
df = pd.read_excel('../original/2020PrecinctAudits.xlsx')

# Add necessary columns
df['state'] = 'IOWA'
df['date']='2020-11-03'
df['office']='US PRESIDENT'

# Rename necessary columns
df.rename(columns={'COUNTY': 'county'}, inplace = True)
df.rename(columns={'PRECINCT': 'precinct'}, inplace = True)

# Make all county names upper case
df['county'] = df['county'].str.upper()

# Remove "#" signs in variable "precinct"
df['precinct'] = df['precinct'].str.replace('#','')

df['mode'] = 'IN-PERSON'
df.loc[df.precinct == 'Absentee', 'mode'] = 'ABSENTEE'

# Rename original and audited votes column names
df.rename(columns={'ELECTION NIGHT COUNT OF VOTES - TALLY BOOK': 'ballots'}, inplace = True)
df.rename(columns={'AUDIT BOARD COUNT OF VOTES': 'audited_ballots'}, inplace = True)

# Calculate the difference
df['issues'] = abs(df['ballots']-df['audited_ballots'])

# Drop unnecessary columns
df = df[['state', 'date', 'office', 'county', 'precinct', 'ballots', 'issues', 'mode']]

# Save the work
df.to_csv('../ready/Iowa_cleaned.csv', index=False)
